Location of the 8000ers

Map showing the location of the 14 eight-thousanders. Source: www.dreamwanderlust.com

Read data from web, clean and combine

Specify URLs and mountain names

wiki_8000er_deaths <- "List of deaths on eight-thousanders"
wiki_800er <- "Eight-thousander"
wiki_everest <- "List of people who died climbing Mount Everest"

# eightthousander_names <- c("Everest", "K2", "Kangchenjunga", "Lhotse",
#                      "Makalu", "Cho Oyu", "Dhaulagiri I", "Manaslu",
#                      "Nanga Parbat", "Annapurna I", "Gasherbrum I",
#                      "Broad Peak", "Gasherbrum II", "Shishapangma")
# 

Define function for extracting tables from Wikipedia page of interest and function for converting an html table to a data frame.

get_wiki_tables <- function(title){
    wiki_api <- "https://en.wikipedia.org/w/api.php"

    params <- list(action = "parse", 
                   page = title, 
                   format = "xml")
  
    webdata <- GET(url = wiki_api, query = params)
    webdata_xml <- content(webdata)
  
    page_html <- read_html(xml_text(webdata_xml))
    table_elements <- html_nodes(x = page_html, css =".wikitable")

    table_elements
}

convert_table <- function(wikitable){
  html_table(wikitable) %>% 
    clean_names()
}

Create data frame of deaths on all 8000ers apart from Everest

# get mountain names from Wikipedia page on 8000ers
eightthousanders <- get_wiki_tables(wiki_800er)

eightthou_summary <- html_table(eightthousanders[1])[[1]] %>% 
  clean_names()

colnames(eightthou_summary) <- eightthou_summary %>% 
    slice(1) %>% 
    unlist(use.names = FALSE)

eightthou_summary_cleaned <-  eightthou_summary %>% 
  janitor::clean_names() %>% 
  slice(-1) %>% 
  # add info re mountain range
  mutate(range = case_when(str_detect(country,"Pakistan") ~ "Karakoram",
                           TRUE ~ "Himalayas")) %>% 
  # fix height info
  mutate(height = as.integer(
    str_remove(
      str_extract(height_24,"8,[:digit:]{3}"),
      ",")))

eightthou_names <- eightthou_summary_cleaned %>% 
  select(name) %>% 
  pull()

# get all tables from deaths on 8000ers Wikipedia page
mountains2_14 <- get_wiki_tables(wiki_8000er_deaths)

# convert html to df; creates a list of dfs
mountains2_14_dfs <- mountains2_14 %>% 
  map(convert_table)

# specify names using the list of mountains from other Wikipedia page
names(mountains2_14_dfs) <- eightthou_names[2:14]

# add mountain names prior to joining dfs
# not elegant but can't figure out how else to do this at the moment
for (i in seq(1,length(mountains2_14_dfs))){
  mountains2_14_dfs[i][[1]] <- mutate(mountains2_14_dfs[i][[1]], 
         mountain = names(mountains2_14_dfs[i]))
}

# reduce list to a single df
deaths_no_everest <- mountains2_14_dfs %>% 
  reduce(bind_rows) %>% 
  select(-x,-references)

Add data from Everest

everest_deaths <- get_wiki_tables(wiki_everest)[1][[1]] %>% 
  convert_table() %>% 
  mutate(mountain = "Everest") %>% 
  select(date, name, nationality, cause_of_death, mountain)

all_8000er_deaths <- bind_rows(everest_deaths,
                               deaths_no_everest)

# save to disk just in case
# saveRDS(all_8000er_deaths, file = "all_8000er_deaths.rds")

Initial overview

Raw count of deaths on all 14 8000ers.

all_8000er_deaths %>% 
  count(mountain) %>% 
  mutate(mountain = fct_reorder(mountain,n)) %>% 
  ggplot(aes(x = n, y = mountain)) +
  geom_col(fill = "lightseagreen") +
  labs(
    x = "Number of deaths",
    y = "",
    title = "Deaths on 8000ers"
  ) +
  theme_light()

However, this is potentially misleading, as it doesn’t take into account how many people climb each of these mountains. Full (and up-to-date) stats on the number of ascents aren’t that easy to find (at least in a way that is automatically accessible), but we can approximate the relative proportions using data from the “Eight-thousander” Wikipedia page (from which we also sourced the mountain names). This page lists the total number of ascents between 1950 and 2012 for each mountain; we make the assumption here that the relative proportion has remained similar since 2012.

eightthou_ascents <- eightthou_summary_cleaned %>% 
  select(name, height, total_ascents_c, range) %>% 
  mutate(total_ascents_c = as.integer(total_ascents_c),
    prop_ascents = total_ascents_c / sum(total_ascents_c))

The proportions are interesting in and of themselves

eightthou_ascents %>% 
  mutate(name = fct_reorder(name,prop_ascents)) %>% 
  ggplot(aes(x = prop_ascents, y = name, fill = range)) +
  geom_col() +
  scale_fill_brewer(palette = "Dark2") +
  labs(
    x = "Proportion of ascents (1950-2012)",
    y = "",
    title = "Which 8000ers are climbed the most?",
    fill = "Range"
  ) +
  theme_light()

Compare this to the height of each mountain

eightthou_ascents %>% 
  mutate(name = fct_reorder(name,height)) %>% 
  ggplot(aes(x = height, y = name, colour = range, size = prop_ascents)) +
  geom_point() +
  scale_colour_brewer(palette = "Dark2") +
    labs(
    x = "Height (m)",
    y = "",
    title = "Higher ≠ more ascents",
    subtitle = "(except for Everest)",
    colour = "Range"
  ) +
  guides(size = "none") +
  theme_light()

Now use the info about ascents to contextualise the number of deaths on each mountain.

all_8000er_deaths %>% 
  count(mountain) %>% 
  mutate(prop_deaths = n / sum(n)) %>% 
  inner_join(eightthou_ascents, by = c("mountain" = "name")) %>% 
  mutate(deaths_weighted = prop_deaths / prop_ascents,
         mountain = fct_reorder(mountain,deaths_weighted)) %>% 
  ggplot(aes(x = deaths_weighted, y = mountain, colour = range)) +
  geom_point(size = 2) +
  geom_segment(aes(xend = 1, yend = mountain)) +
  geom_vline(xintercept = 1, linetype = 2) +
  scale_colour_brewer(palette = "Dark2") +
   labs(
    x = "Ratio of deaths to ascents",
    y = "",
    title = "Which 8000ers are deadliest?",
    colour = "Range"
  ) +
  theme_light()

Leading causes of death on 8000ers

unigrams <- all_8000er_deaths %>% 
  mutate(index = row_number()) %>% 
  unnest_tokens(output = cause_unigrams, input = cause_of_death) %>% 
  anti_join(stop_words, by = c("cause_unigrams"="word"))

causes <- unigrams %>% 
  count(cause_unigrams, sort = TRUE) 


causes %>% 
  datatable(caption = "Cause of death (unigrams)",
            colnames = c("Unigram","Count"))
wordcloud(words = causes$cause_unigrams, freq = causes$n,
          colors = brewer.pal(8,"Dark2"))

Do these differ per mountain?

cause_per_mountain <- unigrams %>% 
  count(cause_unigrams, mountain) %>% 
  group_by(mountain) %>% 
  ungroup()

# function to create a wordcloud for individual mountains
mountain_cloud <- function(mname){
  m <- cause_per_mountain %>% 
    filter(mountain == mname)
  
  wordcloud(words = m$cause_unigrams, freq = m$n,
          colors = brewer.pal(8,"Dark2"))
}

The three deadliest peaks: Annapurna I, K2, Nanga Parbat

mountain_cloud("Annapurna I")

mountain_cloud("K2")

mountain_cloud("Nanga Parbat")

The reference to “Taliban” in the word cloud for Nanga Parbat reflects the 2013 Nanga Parbat massacre, in which 11 people (10 climbers and a local guide) were killed at base camp by Taliban attackers. The attack was retaliation for a US drone strike that killed a Taliban commander.

all_8000er_deaths %>% 
  filter(str_detect(cause_of_death,"[Tt]aliban"))
## # A tibble: 11 × 5
##    date         name                nationality     cause_of_death      mountain
##    <chr>        <chr>               <chr>           <chr>               <chr>   
##  1 22 June 2013 Igor Svergun        Ukraine         Killed by Taliban … Nanga P…
##  2 22 June 2013 Badawi Kashaev      Ukraine         Killed by Taliban … Nanga P…
##  3 22 June 2013 Dmitry Konyaev      Ukraine         Killed by Taliban … Nanga P…
##  4 22 June 2013 Rao Jianfeng        China           Killed by Taliban … Nanga P…
##  5 22 June 2013 Yang Chunfeng       China           Killed by Taliban … Nanga P…
##  6 22 June 2013 Honglu Chen         China (USA/Chi… Killed by Taliban … Nanga P…
##  7 22 June 2013 Sona Sherpa         Nepal           Killed by Taliban … Nanga P…
##  8 22 June 2013 Ernestas Markšaitis Lithuania       Killed by Taliban … Nanga P…
##  9 22 June 2013 Ali Hussain         Pakistan        Killed by Taliban … Nanga P…
## 10 22 June 2013 Anton Dobes         Slovakia        Killed by Taliban … Nanga P…
## 11 22 June 2013 Peter Sperka        Slovakia        Killed by Taliban … Nanga P…

The three least-deadly peaks: Cho Oyu, Gasherbrum II, Everest

mountain_cloud("Cho Oyu")

mountain_cloud("Gasherbrum II")

mountain_cloud("Everest")

Deadliest years

deaths_per_year <- all_8000er_deaths %>% 
  mutate(year = as.integer(str_extract(date,"[:digit:]{4}"))) %>% 
  count(mountain, year, sort = TRUE)


p <- deaths_per_year %>% 
  filter(year >= 2000) %>% 
  ggplot(aes(x = year, y = n, colour = mountain)) +
  geom_point() +
  geom_line() +
  gghighlight(mountain %in% c("Everest","K2", "Manaslu", "Nanga Parbat")) +
  scale_colour_brewer(palette = "Dark2") +
  theme_light() +
  labs(
    title = "Deadliest years since 2000",
    x = "Year",
    y = "Number of deaths"
  )
## Warning: Tried to calculate with group_by(), but the calculation failed.
## Falling back to ungrouped filter operation...

## Warning: Tried to calculate with group_by(), but the calculation failed.
## Falling back to ungrouped filter operation...
## label_key: mountain
ggplotly(p)
## Warning in geom2trace.default(dots[[1L]][[4L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues
## Warning in geom2trace.default(dots[[1L]][[4L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues

## Warning in geom2trace.default(dots[[1L]][[4L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues

## Warning in geom2trace.default(dots[[1L]][[4L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues

Everest

deaths_per_year %>% 
  filter(mountain == "Everest", year >= 1980) %>% 
  ggplot(aes(x = year, y = n)) +
  geom_point() +
  geom_line() +
  labs(
    title = "Deaths on Everest since 1980",
    x = "Year",
    y = "Number of deaths"
  ) +
  theme_light() +
  annotate("text", x = 1996, y = 16.5, label = "'Into Thin Air' disaster\n(storm)") +
  annotate("text", x = 2006, y = 12, label = "Multiple incidents") +
  annotate("text", x = 2015, y = 18, label = "Nepal earthquake") +
  annotate("text", x = 2019, y = 13.5, label = "Over-\ncrowding")

deaths_per_year %>% 
  filter(mountain == "K2", year >= 1980) %>% 
  ggplot(aes(x = year, y = n)) +
  geom_point() +
  geom_line() +
  labs(
    title = "Deaths on K2 ('The Savage Mountain') since 1980",
    x = "Year",
    y = "Number of deaths"
  ) +
  ylim(c(0,15)) +
  theme_light() +
  annotate("text", x = 1986, y = 14.5, label = "Storm\n(+ other incidents)") +
  annotate("text", x = 1995, y = 9, label = "Storm") +
  annotate("text", x = 2008, y = 12, label = "Serac collapse") +
  annotate("text", x = 2021, y = 7, label = "Winter\ndeaths")

  # annotate("text", x = 2019, y = 13.5, label = "Over-\ncrowding")